
/*

 THIS .DO FILE PERFORMS A DECOMPOSITION OF FIRM-DESTINATION LEVEL GROWTH RATES INTO
 SEVERAL COMPONENTS INCLUDING:
	- MACRO (AGGREGATE) COMPONENT
	- SECTOR (INDUSTRY) COMPONENT
	- FIRM (INDIVIDUAL, IDIOSYNCRATIC) COMPONENT
	
	ROBUSTNESS:
	- ZONE (LOCAL) COMPONENT
	
 PROGRAM: "microestimation.do"
 
 THIS FILE:
 - USES "$data" AS THE MASTER DATA FILE
   - "$data" SHOULD CONTAIN FIRM-DESTINATION LEVEL REVENUES AT THE YEARLY LEVEL
   
 - CREATES THE PREVIOUSLY MENTIONED COMPONENTS AT THE FIRM-DESTINATION LEVEL FOR DIFFERENT SPECIFICATIONS:
   1. FIXED EFFECTS (FE) APPROACH ON FIRM-DESTINATION LEVEL REVENUE
   2. FE APPROACH ON FIRM-DESTINATION LEVEL REVENUE AND INTERACTIONS OF FE WITH FIRM-LEVEL CONTROLS
   - FIRM-LEVEL HETEROGENEOUS RESPONSE
   3. FE APPROACH WITH SECTOR-DESTINATION AND LOCAL MARKET EFFECTS ON FIRM-DESTINATION LEVEL REVENUE
   * NOTE: - WE SKIP ANY SPECIFICATION WITH VALUE ADDED AS IT IS ONLY AVAILABLE FOR MANUFACTURING
           - FURTHERMORE, IT IS ONLY USED AS ROBUSTNESS IN DGLM'14
   
 - CALLS UPON A SET OF .DO FILES:
   1. "dGLM_2_growth_regs.do"
     - FE REGRESSION FOR A SINGLE COUNTRY * YEAR
   2. "dGLM_4_threesteps_growth_regs.do"
     - FE REGRESSION, STORING OF RESULTS AND DECOMPOSITION OF FIRM-LEVEL IDIOSYNCRATIC COMPONENT INTO FIRM-COMMON AND FIRM-DESTINATION SPECIFIC COMPONENTS
     - THIS FILE CALLS UPON/NESTS "dGLM_2_growth_regs.do"
   
 INPUTS:
 - WORKING PATH
 - NAME OF LOG FILE
 - TIME PERIODS (FIRST, SECOND, SECOND-LAST AND LAST YEAR)
 - VARIABLES OF THE SCRIPT/PROGRAM:
	- "data": NAME OF DATA SET
	- "samplename": EQUAL TO EITHER "ALL" (WHOLE ECONOMY) OR "MFG" (RESTRICTION TO MANUFACTURING ONLY)
	NOTE: WE WILL USE "ALL" (WHOLE ECONOMY) AS A BASELINE
	- "refsect": "REFERENCE" SECTOR IN FE REGRESSIONS WHICH SERVES AS A "NORMALIZATION"
	- "sect": NAME OF VARIABLE IDENTIFYING SECTORS (EITHER "SIC" OR "NAICS")
	  - WE WILL USE "SIC2" AS BASELINE WHICH IS CONTAINED IN VARIABLE "industry2"
	NOTE: WE MIGHT NEED TO USE "NAICS" (RATHER THAN "SIC2/SIC3") FOR TIME CONSISTENCY ACROSS YEARS IN LBD-E/LBD-F
	- "controls": SET OF FIRM-LEVEL VARIABLES USED AS CONTROL IN ROBUSTNESS CHECKS (ALLOWING FOR HETEROGENEOUS RESPONSES)
	  - SIZE (EMPLOYMENT COUNT/PAY ROLL (WAGES)), AGE AND OPENNESS (FRACTION OF REVENUE THAT IS EXPORTED)
	  
 "$data" MUST CONTAIN THE FOLLOWING VARIABLES:
 - year: TIME IDENTIFIER
 - firmid: FIRM IDENTIFIER
 - $sect: SECTOR IDENTIFIER (SIC/NAICS)
   - "industry2" (SIC2)
 - country: DESTINATION (COUNTRY) IDENTIFIER
 - gr_$variable: ANNUAL GROWTH RATES OF SALES BY FIRM-DESTINATION-YEAR
 - revctry: CURRENT VALUE OF MARKET-SPECIFIC SALES
   NOTE: THIS IS THE VARIABLE CONTAINED IN THE GLOBAL $variable
 - lag_revctry: LAGGED VALUE OF MARKET-SPECIFIC SALES
 - FIRM-LEVEL CONTROL VARIABLES
   - size:
     - emp
     - revenue
     - pay
   - age
   - openness
 - out_gr: A DUMMY EQUAL TO ONE IF THE GROWTH RATE FALLS INTO THE CATEGORY OF OUTLIERS (SPECIFIED MANUALLY)
   NOTE: DGLM'14 DEFINE "out_gr==1" IF "-0.5 < gr_$variable < 1"
 - loc: LOCAL MARKET IDENTIFER (COUNTY/STATE)
   NOTE: DGLM'14 DEFINE A ZONE TO BE "ZONE D'EMPLOI" WHICH IS COMPARABLE TO SOMETHING BETWEEN MSA/COUNTY
 
 OUTPUT:
 1. $PthOut/growth_regs_$variable_$samplename
   - DECOMPOSITION ON TOTAL REVENUE

*/

*** HOUSEKEEPING ***
clear all

set more off
timer clear

* Working paths
global base 
global DoPth 
global PthIn 
global PthOut

* Time span
global firsty 1994
global secondy 1995
global lasty 2011

* Globals of program
global data = "firm_dest_LBD_rev_master"
global samplename = "all"
*global variable = "revctry"
*global varsamp = "revctry_all"
global refsect = "47"
* Note:  - choice of "reference" sector is ad hoc. 
*	 - dGLM'14 choose "Transportation" (NAF = 60) as "reference" sector.
*	 - To stay as close as possible to dGLM'14, we choose "Transportation Services" (SIC2 = 47) as "reference" sector
global sect = "industry2"
global controls "pct_size classage d_openness pct_pay"

* Log file
log using "$PthOut/log_microestimation_$varsamp", text replace

* Load in .do files that are called upon in the program
cd "$DoPth"
qui do dGLM_2_growth_regs
qui do dGLM_4_threesteps_growth_regs
qui do dGLM_9_matlab_prep

cd "$PthIn"


timer on 1

*** DECOMPOSITION OF GROWTH RATES INTO MACRO, SECTOR AND FIRM-SPECIFIC COMPONENTS ***

* LOADING AND PREPARING DATA
use "firm_dest_LBD_rev_master.dta", clear

* Drop outliers and missing values
drop if out_gr==1
keep if gr_$variable!=.

* Create country and firm-destination identifiers
egen id_c = group(country)
egen id_loc = group(county)
* Note: - "country" is US Census Bureau's international country code
*	- "iso" is two letter coded equivalent (only slightly less complete than "country")
egen id_fn = group(firmid country)

* Identify country/year pairs with less than 10 (or 25) observations
tab id_c year 

* Summary statistics on individual growth rates (by year)
* Note: by construction, we cannot construct statistics on growth rates for first year ("firsty")
forvalues y = $secondy(1)$lasty  {
	disp "Year equals "`y' 
	sum gr_$variable if year==`y'
}

* Compute weights (by year)
* Note: 
*	- We are only constructing FIRM-DESTINATION weights
*	- SECTOR-DESTINATION weights will be constructed later (using currently constructed FIRM-DESTINATION weights) in MatLab master file
egen tot_$variable = sum(lag_$variable), by (year)
gen w_fn = lag_$variable/tot_$variable
label var w_fn "weight of firm-destination level revenue in total revenue (yearly)"

*** Construct firm-level variables used as controls in robustness checks
* Log size and percentiles (quintiles)
* Note: - use "emp" as a measure of size
*	- prevent mechanical relationships through use of "revenue" as it is already part of outcome variable
gen log_size = log(emp)
foreach perc of numlist 20(20)80  {
	egen sp`perc' = pctile(log_size), p(`perc') by (year)
}
* Note: p`perc' generates the cut-off in log(size) such that it belongs to the percentile category ('perc'-1)*20 and 'perc'*20

gen pct_size = 1 if log_size < sp20
replace pct_size = 2 if log_size>=sp20 & log_size<sp40
replace pct_size = 3 if log_size>=sp40 & log_size<sp60
replace pct_size = 4 if log_size>=sp60 & log_size<sp80
replace pct_size = 5 if log_size>=sp80
drop sp**

* Log pay roll and percentiles (quintiles)
* Note: - use "wage bill" (or "pay roll") as a measure of size
*	- prevent mechanical relationships through use of "revenue" as it is already part of outcome variable
gen log_pay = log(pay)
foreach perc of numlist 20(20)80  {
	egen sp`perc' = pctile(log_pay), p(`perc') by (year)
}
* Note: p`perc' generates the cut-off in log(pay) such that it belongs to the percentile category ('perc'-1)*20 and 'perc'*20

gen pct_pay = 1 if log_pay < sp20
replace pct_pay = 2 if log_pay>=sp20 & log_pay<sp40
replace pct_pay = 3 if log_pay>=sp40 & log_pay<sp60
replace pct_pay = 4 if log_pay>=sp60 & log_pay<sp80
replace pct_pay = 5 if log_pay>=sp80
drop sp**

* Age: classes (younger or equal to 5 years and older than 5 years) and log age
gen log_age = log(age+1)
gen classage = 0 if age<=5
replace classage = 1 if age>5

* Trade openness (average ratio of foreign over total revenue over firm's life-cycle)
gen revenue_total = revenue*1000
bys firmid year: gen open = revenue_foreign/revenue_total if _n==1
bys firmid: egen mean_open = mean(open)
replace mean_open = . if revenue_foreign==.

gen d_openness = 0
replace d_openness = 1 if (mean_open>0.1 & mean_open!=.)
drop open mean_open revenue_total

sort firmid country year

save "$PthOut/firm_dest_LBD_rev_master_temp.dta", replace

timer off 1
timer list 1

* DECOMPOSITION OF INDIVIDUAL GROWTH RATES

* Correlation between all firm-specific variables (controls)
corr $controls

*** BENCHMARK ***

timer on 2

* Estimation without any control variables on firm-destination level growth rates
* - This generates the "benchmark" results in Appendix B
di "Benchmark specification: total revenue for $samplename"
* Inputs:
* - input file
* - growth rate variable
* - output file
dGLM_4_threesteps_growth_regs $PthOut/firm_dest_LBD_rev_master_temp.dta gr_$variable $PthOut/growth_regs_$varsamp
sum gr_idio Rsquared

timer off 2
timer list 2

timer on 3

* Export to .csv format to import in Matlab
di "Export-import process for sample: $samplename"
* Inputs:
* - name of output file (.csv format)
* - 0 for "benchmark", 1 for "robustness" (heterogeneous response), 2 for "local"
* - name of LHS growth rate variable

use "$PthOut/growth_regs_$varsamp.dta", clear
dGLM_9_matlab_prep growth_regs_revctry_all 0 gr_$variable

timer off 3
timer list 3

* Close and save log file
log close
